#!/usr/bin/python
# -*- coding: utf-8 -*-
from __future__ import division
import sys, argparse
import csv
import time as t
import datetime as d
import numpy as np
import pandas as pd
'''
将GBDT产生模型的叶子信息转化为FM格式特征
'''

def parse_args():
    if len(sys.argv) == 1:
        sys.argv.append('-h')

    parser = argparse.ArgumentParser()
    parser.add_argument('leaf')
    parser.add_argument('start_idx')
    parser.add_argument('output')
    args = vars(parser.parse_args())
    return args

args = parse_args()
leaf = args['leaf']
start_idx = int(args['start_idx'])
output = args['output']

# get number of leafs
num_of_leafs = 0
with open(leaf) as f:
    for line in f:
        ids = [int(i) for i in line.split()]
        max_id = np.max(ids)
        num_of_leafs = max(max_id, num_of_leafs)

print 'num_of_leafs:\t', num_of_leafs

with open(leaf) as f:
    with open(output, 'w') as g:
        for line in f:
            out = []
            ids = [int(i) for i in line.split()]
            for i, id in enumerate(ids):
                index = start_idx + num_of_leafs * i + id
                out.append(index)
            out_line = ' '.join(["%d:1" % r for r in out])
            g.write(out_line + '\n')
print 'write to ', output
